*** The file builds spending shares by across all relevant product modules
*** by price decile cells of the product space 

cd "D:\Dropbox\unequal_gains\main_data\HMS"
global db "D:\Dropbox\unequal_gains\main_data"
global Section4 "D:\Dropbox\unequal_gains\QJE revision plan\analysis\section4_data"

**** 1. Get total each household group * module-quality bin 

foreach i of numlist 2004(1)2006 {

use "$db/Important Price Datasets/price_index_household_income_`i'_2004_modules.dta", clear

collapse (sum) total_spending total_quantity [aw=projection_factor], by(upc upc_ver_uc product_module_code)
gen average_unit_price_all=total_spending/total_quantity

* bring in info on UPC price decile (within product module) in base period:
merge m:1 upc upc_ver_uc using "$db/Important Lists/upc_brand_size_final.dta"
keep if _merge==3
drop _merge
sum size1_amount, d
drop if size1_amount<r(p1) | size1_amount>r(p99)
gen average_unit_price_adj=average_unit_price_all/(size1_amount*multi)
* create quality ranks without weights
sort product_module_code average_unit_price_adj
bysort product_module_code: gen double temp=[_N]
bysort product_module_code: gen double temp2=[_n]
gen rank=temp2/temp*100
gen quality_rank=.
foreach r of numlist 1(1)10 {
replace quality_rank=`r' if rank<=`r'*10 & missing(quality_rank)
}

keep upc upc_ver_uc quality_rank
duplicates drop
save "$db/Important Price Datasets/UPCqualitydeciles_`i'.dta", replace
}

foreach i of numlist 2004(1)2006 {

* load data
use "$db/Important Price Datasets/price_index_household_income_`i'_2004_modules.dta", clear

* get the quality rank
merge m:1 upc upc_ver_uc using "$db/Important Price Datasets/UPCqualitydeciles_`i'.dta"
keep if _merge==3
drop _merge

* bring in household demographics
drop household_age
merge m:1 household_code using "$db/Important Datasets/panelists_`i'.dta"
keep if _merge==3 
drop _merge

* get age 
gen male_head_birth_temp=substr(male_head_birth,1,4) 
gen female_head_birth_temp=substr(female_head_birth,1,4)
gen male_head_birth_num=real(male_head_birth_temp)
gen female_head_birth_num=real(female_head_birth_temp)
gen male_head_age_num=`i'-male_head_birth_num
gen female_head_age_num=`i'-female_head_birth_num
gen household_age=(male_head_age_num+female_head_age_num)/2
replace household_age=male_head_age_num if missing(female_head_age_num)
replace household_age=female_head_age_num if missing(male_head_age_num)

rename household_age age 
drop if missing(age) 
gen age_bin=.
drop if age<20
replace age_bin = 1 if age<26
replace age_bin = 2 if age<31 & missing(age_bin)
replace age_bin = 3 if age<36 & missing(age_bin)
replace age_bin = 4 if age<41 & missing(age_bin)
replace age_bin = 5 if age<46 & missing(age_bin)
replace age_bin = 6 if age<51 & missing(age_bin)
replace age_bin = 7 if age<56 & missing(age_bin)
replace age_bin = 8 if age<61 & missing(age_bin)
replace age_bin = 9 if age<66 & missing(age_bin)
replace age_bin = 10 if age<71 & missing(age_bin)
replace age_bin = 11 if age<76 & missing(age_bin)
replace age_bin = 12 if age>75 

* get education 
gen education=max(male_head_education,female_head_education)
drop if education==0
rename education education_old
gen education=.
* below high school
replace education=1 if education_old==1 | education_old==2
* high school graduate
replace education=2  if education_old==3
* some college 
replace education=3 if education_old==4
* college grad 
replace education=4 if education_old==5 
* post college grad 
replace education=5 if education_old==6 

* get race 
drop if missing(race)
rename race race_old
gen race=.
* White
replace race=1 if race_old==1
* Black
replace race=2 if race_old==2
* Other
replace race=3 if race_old>2

* get presence of children
drop if missing(age_and_presence_of_children) 
gen children=.
replace children=1 if age_and_presence_of_children<9
replace children=0 if age_and_presence_of_children==9

* collapse product module total expenditures by group WITH projection weights
collapse (sum) total_spending [fw=projection_factor], ///
by(education fips_state_code age_bin race children product_module_code quality_rank) fast
save "$Section4/`i'_spending_by_all_HH_group.dta", replace 
}

**** 2. Now compute spending shares across all three years by group 
use "$Section4/2004_spending_by_all_HH_group.dta", clear
append using "$Section4/2005_spending_by_all_HH_group.dta"
append using "$Section4/2006_spending_by_all_HH_group.dta"

collapse (sum) total_spending, by(product_module_code quality_rank fips_state_code age_bin education race children) fast
save "$Section4/age_educ_race_children_state_shares", replace

* record list of HH groups that we have in Nielsen data
keep fips_state_code age_bin education race children
rename fips_state_code statefip 
duplicates drop
save "$Section4/age_educ_race_children_state_groups_used", replace
